
## Name: Carlos Gould
## Project: Census Data Analysis for Ecuador 
## Task: Ambient PM2.5

# 0. Libraries -------

library(tidyverse)
library(rgdal)
library(sf)
library(raster)
library(rasterVis)
library(viridis)
library(profmem)
library(tidyverse)
library(ncdf4)
library(mgcv)

# 1. Data ---------

# Canton ID matching and population
canton_Ns <- read_csv("~/BurkeLab Dropbox/Carlos Gould/Ecuador National Health and LPG Analysis/Manuscript/Submission/LGH/Data/canton_id_universal.csv") %>%
  dplyr::select(-`...1`)


# Shapefiles
ecu_shp1 <- sf::st_read("~/Downloads/ecu_adm_inec_20190724_shp/ecu_admbnda_adm1_inec_20190724.shp")
ecu_shp2 <- sf::st_read("~/Downloads/ecu_adm_inec_20190724_shp/ecu_admbnda_adm2_inec_20190724.shp")
ecu_shp2_df_bind <- read_rds("~/BurkeLab Dropbox/Carlos Gould/Ecuador National Health and LPG Analysis/Code/May2020/OtherCovariates/period-avgs/ecu_shp2_df_bind.rds")
cf_province <- read_rds("~/BurkeLab Dropbox/Carlos Gould/Ecuador National Health and LPG Analysis/Code/DataProducts/CF_Predictions/province_cf_period_avg.rds")

# Main data ------
full_df <- read_rds("~/Downloads/gould_ehp_ecuador_cf_df.rds")

path <- "~/BurkeLab Dropbox/Carlos Gould/Ecuador National Health and LPG Analysis/Manuscript/Submission/LGH/Data/Ambient/"
ecu_pm25_1998 <- read_csv(paste0(path, "ecu_pm25_1998.csv"))
ecu_pm25_1999 <- read_csv(paste0(path, "ecu_pm25_1999.csv"))
ecu_pm25_2000 <- read_csv(paste0(path, "ecu_pm25_2000.csv"))
ecu_pm25_2001 <- read_csv(paste0(path, "ecu_pm25_2001.csv"))
ecu_pm25_2002 <- read_csv(paste0(path, "ecu_pm25_2002.csv"))
ecu_pm25_2003 <- read_csv(paste0(path, "ecu_pm25_2003.csv"))

ecu_pm25_2008 <- read_csv(paste0(path, "ecu_pm25_2008.csv"))
ecu_pm25_2009 <- read_csv(paste0(path, "ecu_pm25_2009.csv"))
ecu_pm25_2010 <- read_csv(paste0(path, "ecu_pm25_2010.csv"))
ecu_pm25_2011 <- read_csv(paste0(path, "ecu_pm25_2011.csv"))
ecu_pm25_2012 <- read_csv(paste0(path, "ecu_pm25_2012.csv"))


ecu_pm25_2015 <- read_csv(paste0(path, "ecu_pm25_2015.csv"))
ecu_pm25_2016<- read_csv(paste0(path, "ecu_pm25_2016.csv"))
ecu_pm25_2017 <- read_csv(paste0(path, "ecu_pm25_2017.csv"))
ecu_pm25_2018 <- read_csv(paste0(path, "ecu_pm25_2018.csv"))
ecu_pm25_2019 <- read_csv(paste0(path, "ecu_pm25_2019.csv"))

ecu_pm25 <- ecu_pm25_1998 %>% 
  mutate(Year = 1998) %>% 
  bind_rows(ecu_pm25_1999 %>% 
              mutate(Year = 1999),
            ecu_pm25_2000 %>% 
              mutate(Year = 2000),
            ecu_pm25_2001 %>% 
              mutate(Year = 2001),
            ecu_pm25_2002 %>% 
              mutate(Year = 2002),
            ecu_pm25_2003 %>% 
              mutate(Year = 2003),
            ecu_pm25_2008 %>% 
              mutate(Year = 2008),
            ecu_pm25_2009 %>%
              mutate(Year = 2009),
            ecu_pm25_2010 %>%
              mutate(Year = 2010),
            ecu_pm25_2011 %>%
              mutate(Year = 2011),
            ecu_pm25_2012 %>%
              mutate(Year = 2012),
            ecu_pm25_2015 %>%
              mutate(Year = 2015),
            ecu_pm25_2016 %>%
              mutate(Year = 2016),
            ecu_pm25_2017 %>%
              mutate(Year = 2017),
            ecu_pm25_2018 %>%
              mutate(Year = 2018),
            ecu_pm25_2019 %>%
              mutate(Year = 2019)) %>%
  mutate(period = ifelse(Year>=1988 & Year<=1992, "1988-1992",
                         ifelse(Year>=1999 & Year<=2003, "1999-2003",
                                ifelse(Year>=2008 & Year<=2012, "2008-2012",
                                       ifelse(Year>=2015 & Year<=2019, "2015-2019", NA))))) %>% 
  rename(ADM2_ES = `Zone ID`) 


ecu_shp2_df_bind_pm <- ecu_shp2_df_bind %>% 
  left_join(ecu_pm25) %>% 
  mutate(pm_mean = ifelse(!is.na(Mean), Mean, Median)) %>% 
  mutate(pm_mean = ifelse(pm_mean>1000, Median, pm_mean))

ecu_shp2_df_bind_pm %>% filter(is.na(pm_mean))


# 2. Process and combine data needed to run models ------

full_df_1_pm <- full_df_1 %>% 
  mutate(period = plyr::mapvalues(period,
                                  from=c("1990", "2001", "2010", "2015-2019"),
                                  to=c("1988-1992", "1999-2003", "2008-2012", "2015-2019"))) %>% 
  left_join(ecu_shp2_df_bind_pm %>% 
              as_tibble() %>% 
              dplyr::select(canton_id_universal, period, pm_mean) %>% 
              group_by(canton_id_universal, period) %>% 
              summarize(pm_mean = mean(pm_mean, na.rm=T)), by=c("canton_id_universal", "period"))


# Summary of data by period -------
full_df_1_pm %>% 
  group_by(period) %>%
  summarize(mean = mean(pm_mean, na.rm=T),
            sd = sd(pm_mean, na.rm=T),
            median = median(pm_mean, na.rm=T),
            q1 = quantile(pm_mean, probs=0.25, na.rm=T),
            q3 = quantile(pm_mean, probs=0.75, na.rm=T))


# 3. Models ---------

# FEGLM: CF10 associated with PM Mean, empty --------
pm_feglm_0_mod <- fixest::feglm(pm_mean ~
                            cf10 | 
                            
                            # percent_rural_corrected_ratio + # not w/ outcome
                            # electricidad_no + # associated
                            # materials_index + # associated
                            # elimina_servidas_red_pozo_letrina + # associated
                            # Literate_women + # associated
                            # InSchool_girls + # associated
                            # Idioma_indigena_self + # associated
                            
                            as.factor(canton_id_universal) +
                            as.factor(period)
                          ,
                          data=full_df_1_pm)

pm_feglm_0_mod

# FEGLM: CF10 associated with PM Mean, adjusted --------

pm_feglm_1_mod <- fixest::feglm(pm_mean ~
                                  cf10 + 
                                  
                                  percent_rural_corrected_ratio + # not w/ outcome
                                  electricidad_no + # associated
                                  materials_index + # associated
                                  elimina_servidas_red_pozo_letrina + # associated
                                  Literate_women + # associated
                                  InSchool_girls + # associated
                                  Idioma_indigena_self | # associated
                                  
                                  as.factor(canton_id_universal) +
                                  as.factor(period)
                                ,
                                data=full_df_1_pm)

pm_feglm_1_mod

pm_cf_mod_df <- tidy(pm_feglm_0_mod) %>% 
  mutate(model = "Empty") %>% 
  bind_rows(tidy(pm_feglm_1_mod)[1,] %>% 
              mutate(model = "Adjusted")) 

pm_cf_mod_df

pm_cf_fig <- ggplot(pm_cf_mod_df, aes(x=model, 
                         y=estimate, ymin=estimate-1.96*std.error, ymax=estimate+1.96*std.error))  + 
  geom_pointrange(size=0.4) + 
  xlab("") + 
  scale_x_discrete(limits=c("Empty", "Adjusted")) + 
  geom_hline(yintercept=0) + 
  annotate("text", x=1, y=-0.12, label="-0.25 (95% CI: -0.36 to -0.14)", size=3) + 
  annotate("text", x=2, y=-0.06, label="-0.20 (95% CI: -0.33 to -0.08)", size=3) + 
  ggtitle("Canton %CF and ambient PM2.5 concentrations") + 
  ylab("Canton mean ambient PM2.5 concentrations\nper 10 p.p. increase in %CF\n(micrograms per cubic meter)") + 
  theme_bw() + 
  coord_cartesian(ylim=c(-0.35, 0)) + 
  ggthemes::theme_clean() + 
  # coord_cartesian(ylim=c(-0.35, 0)) + 
  theme(axis.text.y = element_text(size=10, color="black"),
        axis.text.x = element_text(size=9, color="black"),
        axis.title = element_text(size=10, face="bold"),
        plot.title = element_text(size=10, face="bold"),
        legend.position="none",
        axis.line.x = element_blank(),
        axis.ticks = element_blank(),
        axis.line.y = element_blank(),
        legend.title = element_blank(),
        plot.background = element_blank(),
        legend.text=element_text(size=12, color="black"))

# broom::tidy(pm_glm_1_mod)

alri5_feglm_0_mod_pm <- fixest::feglm(ALRI5_5_plus ~
                                         offset(log(under5population)) + 
                                         pm_mean |
                                         # cf10 |
                                         
                                         as.factor(canton_id_universal) +
                                         as.factor(period)
                                       ,
                                       data=full_df_1_pm %>% 
                                         filter(period!="1988-1992"),
                                       family = quasipoisson())

alri5_feglm_0_mod_pm

alri5_feglm_0_mod_threeperiod <- fixest::feglm(ALRI5_5_plus ~
                                offset(log(under5population)) + 
                                # pm_mean +
                                cf10 |
                                
                                as.factor(canton_id_universal) +
                                as.factor(period)
                              ,
                              data=full_df_1_pm %>% 
                                filter(period!="1988-1992"),
                              family = quasipoisson())

alri5_feglm_0_mod_threeperiod

alri5_feglm_0_mod_pm_cf <- fixest::feglm(ALRI5_5_plus ~
                            offset(log(under5population)) + 
                            pm_mean +
                            cf10 |
                            
                            as.factor(canton_id_universal) +
                            as.factor(period)
                          ,
                          data=full_df_1_pm %>% 
                            filter(period!="1988-1992"),
                          family = quasipoisson())

alri5_feglm_0_mod_pm_cf


alri5_feglm_1_mod_threeperiod <- fixest::feglm(ALRI5_5_plus ~
                                offset(log(under5population)) + 
                                # pm_mean +
                                cf10 +
                                
                                percent_rural_corrected_ratio + # not w/ outcome
                                electricidad_no + # associated
                                
                                materials_index + # associated
                                
                                elimina_servidas_red_pozo_letrina + # associated
                                
                                Literate_women + # associated
                                InSchool_girls + # associated
                                Idioma_indigena_self + # associated
                                
                                pcv3_use + # not really
                                vaccine_index + # not w/ cf
                                # 
                                ANC_use + # pretty much
                                ANC_visits_median_use |
                                
                                as.factor(canton_id_universal) +
                                as.factor(period)
                              ,
                              data=full_df_1_pm %>% 
                                filter(period!="1988-1992"),
                              family = quasipoisson())

alri5_feglm_1_mod_threeperiod


alri5_feglm_1_mod_pm <- fixest::feglm(ALRI5_5_plus ~
                                offset(log(under5population)) + 
                                pm_mean +
                                cf10 + 
                                
                                percent_rural_corrected_ratio + # not w/ outcome
                                electricidad_no + # associated
                               
                                materials_index + # associated
                             
                              elimina_servidas_red_pozo_letrina + # associated
                            
                              Literate_women + # associated
                              InSchool_girls + # associated
                              Idioma_indigena_self + # associated
                             
                              pcv3_use + # not really
                              vaccine_index + # not w/ cf
                              # 
                              ANC_use + # pretty much
                              ANC_visits_median_use |
                              
                              as.factor(canton_id_universal) +
                                as.factor(period)
                              ,
                              data=full_df_1_pm %>% 
                                filter(period!="1988-1992"),
                              family = quasipoisson())

alri5_feglm_1_mod_pm



alri5_pm_cf_mod_df <- tidy(alri5_feglm_0_mod_pm)[1,] %>% 
  mutate(model = "Empty, no %CF") %>% 
  bind_rows(tidy(alri5_feglm_0_mod_threeperiod)[1,] %>% 
              mutate(model = "Empty, no ambient PM2.5")) %>% 
  bind_rows(tidy(alri5_feglm_0_mod_pm_cf)[1:2,] %>% 
              mutate(model = "Empty, with ambient PM2.5")) %>% 
  bind_rows(tidy(alri5_feglm_1_mod_pm)[1:2,] %>% 
              mutate(model = "Adjusted")) %>% 
  mutate(term = plyr::mapvalues(term,
                            from=c("pm_mean", "cf10"),
                            to=c("Ambient PM2.5", "Clean Fuel Use (%)"))) %>% 
  mutate(irr = exp(estimate),
         irr.high = exp(estimate-(1.96*std.error)),
         irr.low = exp(estimate+(1.96*std.error)))



# 4. Produce figures -------


pm_period_plot <- ggplot(full_df_1_pm, aes(x=period, y=pm_mean, group=period, color=period, fill=period)) + 
  geom_hline(yintercept = 35, linetype="dashed", size=0.5) +
  geom_hline(yintercept = 5, linetype="solid", size=0.5) + 
  annotate("text", x="2015-2019", y=36, label="WHO Interim-1 Annual Guideline", size=3) + 
  annotate("text", x="2015-2019", y=6, label="WHO Annual Guideline", size=3, hjust=0.5) + 
  annotate("text", x="1988-1992", y=7.5, label="No data", size=3, hjust=0.5) + 
  geom_violin(width = 0.4, color="black", alpha=0.3) + 
  geom_boxplot(width=0.2, size=0.2, alpha=0.8, outlier.shape = NA, color = "black") +
  scale_y_continuous(breaks=c(5, 10, 15, 20, 25, 30, 35)) + 
  xlab("") +
  ylab("Difference in \nmean canton-period PM2.5 concentration\n(microgram per cubic meter)") + 
  ggtitle("") + 
  theme_clean() + 
  theme(axis.text.y = element_text(size=10, color="black"),
        axis.text.x = element_text(size=9, color="black"),
        axis.title = element_text(size=10, face="bold"),
        plot.title = element_text(size=10, face="bold"),
        legend.position="none",
        axis.line.x = element_blank(),
        axis.ticks = element_blank(),
        axis.line.y = element_blank(),
        legend.title = element_blank(),
        plot.background = element_blank(),
        legend.text=element_text(size=12, color="black"))




pm_cf_alri5_fig <- ggplot(alri5_pm_cf_mod_df, aes(x=model, 
                         y=irr, ymin=irr.high, ymax=irr.low,
                         shape=term
                         ))  + 
  geom_pointrange(position = position_dodge2(width=0.2),
                  size=0.4) + 
  xlab("") + 
  scale_x_discrete(limits=c("Empty, no %CF", 
                            "Empty, no ambient PM2.5", 
                            "Empty, with ambient PM2.5",
                            "Adjusted")) + 
  geom_hline(yintercept=1) + 
  annotate("text", x=1, y=1.72, label="Ambient PM2.5", size=3) + 
  annotate("text", x=2, y=0.60, label="% Primary Clean Fuel", size=3) + 
  annotate("text", x=2.95, y=1.55, label="Ambient PM2.5", size=3) + 
  annotate("text", x=3.05, y=0.68, label="% Primary Clean Fuel", size=3) + 
  annotate("text", x=3.95, y=1.35, label="Ambient PM2.5", size=3) + 
  annotate("text", x=4.05, y=0.7, label="% Primary Clean Fuel", size=3) + 
  ggtitle("Canton %CF, ambient PM2.5 concentrations, and under-5 LRI mortality") + 
  ylab("Mortality rate ratio\nper 10 p.p. increase in %CF OR\nper 1 microgram per cubic meter increase in PM2.5") + 
  ggthemes::theme_clean() + 
  # coord_cartesian(ylim=c(-0.35, 0)) + 
  theme(axis.text.y = element_text(size=10, color="black"),
        axis.text.x = element_text(size=9, color="black"),
        axis.title = element_text(size=10, face="bold"),
        plot.title = element_text(size=10, face="bold"),
        legend.position="none",
        plot.background = element_blank(),
        axis.line.x = element_blank(),
        axis.ticks = element_blank(),
        axis.line.y = element_blank(),
        legend.title = element_blank(),
        legend.text=element_text(size=12, color="black"))



pm_cf_fig_combined <- cowplot::plot_grid(
  pm_cf_fig,
  pm_cf_alri5_fig,
  rel_widths = c(0.6, 1),
  labels = c("c", "d")
)


pm_summary_reg_fig <- cowplot::plot_grid(
  pm_period_plot,
  pm_cf_fig_combined,
  nrow=2,
  labels=c("b", ""),
  rel_heights=c(0.7, 1)
)

# ggsave("~/Desktop/pm_summary_reg_fig.png",
#        pm_summary_reg_fig,
#        width = 300,
#        height = 215,
#        units = "mm",
#        dpi = 300)

